import json
import logging
import os.path
import re
import shutil
import time



from curl_cffi import requests
#输出日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('../static/DOuBai/log.txt', encoding='utf-8'),  # 日志文件
        logging.StreamHandler()  # 控制台输出
    ]
)
logging = logging.getLogger(__name__)


if os.path.exists("Img"):
    shutil.rmtree("Img")
os.makedirs("Img")



header = {
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                 "(KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0",
    "referer":"https://pic.netbian.com/"


}
all_datas = []

base_url = "https://pic.netbian.com"  # 添加基础URL

#爬取 前10页图片
for page in range(25,500):
    if page == 1:
        url = "https://pic.netbian.com/index.html"
    else:
        url = f"https://pic.netbian.com/index_{page}.html"

    logging.info(f"正在请求第{page}页: {url}")
    response = requests.get(url,headers=header)
    response.encoding  = "gbk"


    items = re.findall(r'<li><a href=".*?" .*?<img src="(.*?)".*?<b>(.*?)</b>',response.text,re.S)
    # items = re.findall(r'<li><a href=".*?" target=".*?".*?<img src="(.*?)"alt ="(.*?)"',response.text)
    print(len(items))
    for item in items:


        logging.info(f"正在爬取中......图片标题是：{item[1]}，封面图片是：{item[0]}")
        all_datas.append({
            "pic": item[0],
            "title": item[1]
        })
        pic_url = base_url + item[0]
        clean_title = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', item[1])
        # 限制文件名长度
        clean_title = clean_title[:100]
        # 确保文件名不为空
        if not clean_title:
            clean_title = "unnamed"
        with open(f"Img/{clean_title}.jpg",'wb') as f:
            f.write(requests.get(pic_url,headers=header).content)
        logging.info(f"正在爬取中......图片下载中")
with open("bian.json",'w',encoding="utf-8") as f :
    json.dump(all_datas,f,ensure_ascii=False,indent=4,separators=(',',': '))



# <li><a href="/tupian/40176.html"
# title="动漫女孩 桌子 金鱼 水杯子 玻璃杯 5K壁纸" target="_blank">
# <span><img src="/uploads/allimg/251027/201942-1761567582a6e9.jpg"
# alt="动漫女孩 桌子 金鱼 水杯子 玻璃杯 5K壁纸"></span><b>动漫女孩 桌子 金鱼 水杯子 玻璃杯 5K壁纸</b></a></li>